I’m in the midst of reading Lewis Hyde’s Trickster Makes the World. It’s great, and I want to see whether the themes it identifies can be substantiated by existing folklore motifs and tale types.
First, we can try a simple query using the word “trickster”:
trickster <-
tmi %>%
filter(str_detect(motif_name,regex("trickster", ignore_case = T))) %>%
select(id:level)
Next, let’s expand to look at any motif involving tricks. We’re going
to use regular expressions (or, regex) to identify patterns in
text, and we’ll save these as a list object:
regexes <- list()
regexes$trick <- "^trick | trick | trick$|^tricks | tricks | tricks$"
tricks <-
tmi %>%
filter(str_detect(motif_name,regex(regexes$trick,ignore_case = T)))
rm(trickster)
In Hyde’s book, he identifies a number of other themes which accompany trickster tales, such as:
We can make some regexes as search strategies, but keep in mind that these should be refined by looking at the data that’s returned:
regexes$bait <- "bait|trap|snare|catch| net "
regexes$steal <- "steal|rob"
regexes$transform <- "shape-shift|in.*shape|take.*shape|assume.*shape|transform"
regexes$border <- "border|boundary|boundaries|cross-roads"
We can add to these anytime (after all, I haven’t even finished
the book yet), but let’s start here and develop a subset of the
tmi which contains trickster-related motifs.
tricks <-
tmi %>%
select(id:chapter_id) %>%
mutate(
trickster = if_else(str_detect(motif_name,regex("trickster", ignore_case = T)),T,F),
trick = if_else(str_detect(motif_name,regex(regexes$trick,ignore_case = T)),T,F),
bait = if_else(str_detect(motif_name,regex(regexes$bait,ignore_case = T)),T,F),
steal = if_else(str_detect(motif_name,regex(regexes$steal,ignore_case = T)),T,F),
transform = if_else(str_detect(motif_name,regex(regexes$transform,ignore_case = T)),T,F),
border = if_else(str_detect(motif_name,regex(regexes$border,ignore_case = T)),T,F)
) %>%
rowwise() %>%
mutate(any = sum(c_across(trickster:border))) %>%
ungroup() %>%
filter(any > 0)
What are the most common motifs in tales that include trickster themes?
trickster_tales %>%
group_by(motif) %>%
summarize(n = n_distinct(atu_id)) %>%
left_join(tmi %>% select(id,motif_name),by = c("motif" = "id")) %>%
select(motif,motif_name,n) %>%
slice_max(order_by = n, n = 15,with_ties = F) %>%
rmarkdown::paged_table()
What if we group by the sections in which these motifs occur?
trickster_tales %>%
group_by(level_1) %>%
summarize(n_tales = n_distinct(atu_id)) %>%
left_join(
tmi %>%
filter(level == 1) %>%
select(level_1,motif_name),
by = c("level_1")
) %>%
select(level_1,motif_name,n_tales) %>%
slice_max(order_by = n_tales, n = 30,with_ties = F) %>%
rmarkdown::paged_table()
Observations:
To look at all motif co-occurrences, we can make a network:
library(tidygraph); library(visNetwork)
nodes <-
trickster_tales %>%
distinct(level_1) %>%
left_join(
tmi %>%
filter(level == 1) %>%
select(level_1,motif_name),
by = c("level_1")
) %>%
mutate(id = row_number()) %>%
select(id, label = level_1, title = motif_name)
edges <-
trickster_tales %>%
select(atu_id,from = level_1) %>%
group_by(atu_id) %>%
mutate(to = lead(from, default = from[1])) %>%
filter(from != to) %>%
group_by(from,to) %>%
summarise(n = n_distinct(atu_id)) %>%
ungroup() %>%
left_join(nodes %>% select(id,label), by = c("from" = "label")) %>%
select(-from) %>%
rename(from = id) %>%
left_join(nodes %>% select(id,label), by = c("to" = "label")) %>%
select(-to) %>%
rename(to = id) %>%
select(from,to,width = n)
graph <-
tbl_graph(nodes,edges) %>%
activate(nodes) %>%
mutate(
btwn = centrality_betweenness(),
eigen = centrality_eigen(),
group = group_edge_betweenness(),
value = scales::rescale(btwn, to = c(0,10))
) %>%
activate(edges) %>%
mutate(
e_btwn = centrality_edge_betweenness(),
width = scales::rescale(e_btwn, to = c(0.5,6))
)
rm(nodes); rm(edges)
And graph it interactively:
library(visNetwork)
network <- toVisNetworkData(graph)
visNetwork(nodes = network$nodes, edges = network$edges) %>%
visEdges(arrows = 'middle') %>%
visIgraphLayout(layout = "layout_with_fr")
Some variations:
value to resize nodes,
to the width argument to resize edges, or to the
group argument to change the colorsWe can also inspect the node and edge measures in a table:
graph %>% activate(nodes) %>% as_tibble() %>% View()
We can also explore more abstract concepts present within the tales
by mapping their language to standardized lexicons. First, we need to
unpack (a.k.a. tokenize) the data so it has one word per row.
This time, we’ll use the longer tale descriptions from the
atu_df:
library(tidytext)
tale_words <-
atu_df %>%
filter(atu_id %in% tale_list) %>%
select(atu_id, tale_type) %>%
unnest_tokens(word, tale_type)
Now we’ll need to get some lexicons and join them to our data. We’ll start with:
library(textdata)
vad <- lexicon_nrc_vad()
# eil <- lexicon_nrc_eil()
values <-
read_delim("value_lexicon.txt","\t",skip = 12, col_names = c("word","code")) %>%
left_join(
read_delim("value_lexicon.txt","\t",skip = 1,n_max = 10,col_names = c("code","value")),
by = "code"
) %>%
mutate(
value = str_remove(value,"^Values_"),
value_desc = case_match(
value,
"AC" ~ "achievement",
"BE" ~ "benevolence",
"CO" ~ "conformity",
"HE" ~ "hedonism",
"PO" ~ "power",
"SD" ~ "self-direction",
"SE" ~ "security",
"ST" ~ "stimulation",
"TR" ~ "tradition",
"UN" ~ "universalism"
)
) %>%
select(-code)
tale_words <-
tale_words %>%
left_join(vad, by = c("word" = "Word")) %>%
left_join(values)
We can compare the average valence (pos/neg), arousal (active/bored) and dominance (strong/weak) of the tales’ language:
p <-
tale_words %>%
group_by(atu_id) %>%
summarise(
avg_valence = mean(Valence, na.rm = T),
avg_arousal = mean(Arousal, na.rm = T),
avg_dominance = mean(Dominance, na.rm = T)
) %>%
left_join(atu_df %>% select(atu_id,tale_name)) %>%
mutate(label = paste0(atu_id,": ", tale_name)) %>%
ungroup() %>%
ggplot(
aes(x = avg_valence, y = avg_arousal, color = avg_dominance, label = label)
) +
geom_point() +
scale_color_viridis_c() +
theme_minimal()
ggplotly(p)
Or we can check to see which values are highlighted in trickster tales, since these tales are considered by Hyde to be amoral and we might thus expect to see an even dispersion:
p <-
tale_words %>%
group_by(value_desc) %>%
summarise(n = n()) %>%
ungroup() %>%
mutate(pct = round(n/sum(n) * 100, digits = 1)) %>%
filter(!is.na(value_desc)) %>%
ggplot(aes(x = value_desc, y = pct)) +
geom_col() +
theme_minimal()
ggplotly(p)
Observations:
atu as a whole.
This graph just shows us their relationship within the subset of tales
we filtered for.A few additional resources for text analysis in R: